clear;
clc;
close all;
addpath('../02_functions');

%------------------------------------------------------------------------%
% This file creates figure 9(b,d) in the paper 
%------------------------------------------------------------------------%

% Choose volume length (in 1000's of contracts)
volume_length           = 200;

% --------------------------------------------------------------------------------------

start_date              = datenum(2007,1,5);
finish_date             = datenum(2020,12,29);

data                    = load('../01_Data/es_trades_1minute_EST.mat');

% SIGNED VOLUME IS SIMULATED FROM A RANDOM WALK IMPLYING THAT EXTREME
% OBSERVATIONS (MORE THAN +-10,000), AS WE SEE, EMPIRICALLY ARE UNLIKELY. 
data_vol_time           = load('../01_Data/es_trades_vol_time.mat');

beg_TRADES              = find_closest(data.caldt_daily,start_date);
fin_TRADES              = find_closest(data.caldt_daily,finish_date);

volume                  = data.trade_quantity_per_min_mat(beg_TRADES:fin_TRADES,:);
caldt_daily             = data.caldt_daily(beg_TRADES:fin_TRADES);
signed_volume           = data.signed_vol_per_min_mat(beg_TRADES:fin_TRADES,:);
beg_VOL_TIME            = find_closest(data_vol_time.caldt_daily,start_date);
fin_VOL_TIME            = find_closest(data_vol_time.caldt_daily,finish_date);

MID_mat                 = data_vol_time.midpoint_last(beg_VOL_TIME:fin_VOL_TIME,:);
caldt_daily_VOL_TIME    = data_vol_time.caldt_daily(beg_VOL_TIME:fin_VOL_TIME);
SV_voltime              = data_vol_time.signed_vol_per_vol_time(beg_VOL_TIME:fin_VOL_TIME,:);
cum_SV_voltime          = data_vol_time.cum_signed_vol(beg_VOL_TIME:fin_VOL_TIME,:);

% ----------------------------------------------------------------% 
% INTERSECT
% ----------------------------------------------------------------% 

[C,IA,IB]                           = intersect(caldt_daily,caldt_daily_VOL_TIME);
caldt_daily_VOL_TIME                = caldt_daily_VOL_TIME(IB);
MID_mat                             = MID_mat(IB,:);
SV_voltime                          = SV_voltime(IB,:);
cum_SV_voltime                      = cum_SV_voltime(IB,:);
volume                              = volume(IA,:);
caldt_daily                         = caldt_daily(IA);
signed_volume                       = signed_volume(IA,:);

cum_volume                          = cumsum(volume,2);
cum_signed_volume                   = cumsum(signed_volume,2);

indx_low                            = find(cum_volume(:,end) < 1000000);
unit_volume                         = 1000;
volume_indx                         = 0:unit_volume:volume_length*unit_volume;
log_ret_int                         = diff(log(MID_mat),1,2);


%% Sort on closing t-1 rsv and then plot the avg cum log returns in trade time:

lead = 1;

sum_signedVol_start                                 = 1276;
sum_signedVol_end                                   = 1335;

log_ret_int_lead                                    = log_ret_int(1+lead:end,:);
close_signed_volume_lead                            = sum(signed_volume(1:end-lead,sum_signedVol_start:sum_signedVol_end),2);
close_volume_lead                                   = sum(volume(1:end-lead,sum_signedVol_start:sum_signedVol_end),2);
indx_no_trading_EOD_volume_lead                     = find(close_volume_lead < 100);
close_volume_lead(indx_no_trading_EOD_volume_lead)  = nan;
close_rel_signed_volume_lead                        = close_signed_volume_lead./close_volume_lead;
signed_volume_lead                                  = SV_voltime(1+lead:end,:);
cum_signed_volume_lead                              = cum_SV_voltime(1+lead:end,:);

caldt_true_lead                                     = caldt_daily(1+lead:end);

caldt_true                                          = caldt_daily;
close_signed_volume                                 = sum(signed_volume(:,sum_signedVol_start:sum_signedVol_end),2);
close_volume                                        = sum(volume(:,sum_signedVol_start:sum_signedVol_end),2);
indx_no_trading_EOD_volume                          = find(close_volume < 100);
close_volume(indx_no_trading_EOD_volume)            = nan;
close_rel_signed_volume                             = close_signed_volume./close_volume;

% ----------------------------------------------------------------% 
% Sorts
% ----------------------------------------------------------------% 


portfolios              = [-1000000000000000 , -10000 , 0, 10000 ,100000000000000000];
NN                      = length(portfolios)-1;

[caldt_close_signedVol_sorted_lead , closing_signedVol_sorted_lead]         = sort_High_Low_fixed(caldt_true_lead,close_signed_volume_lead,portfolios);
[caldt_close_signedVol_sorted , closing_signedVol_sorted]                   = sort_High_Low_fixed(caldt_true,close_signed_volume,portfolios);

% Check:
for p = 1 : NN
    min(closing_signedVol_sorted_lead{p,1})
    max(closing_signedVol_sorted_lead{p,1})
end

percentage = 100*length(find(closing_signedVol_sorted_lead{2,1} >0))/length(closing_signedVol_sorted_lead{2,1})

% ----------------------------------------------------------------% 
% find high ... low date intersections
% ----------------------------------------------------------------% 

for p = 1 : NN
    
    [C,IA,IB]                               = intersect(caldt_close_signedVol_sorted{p,1},caldt_true);
    caldt_close_signedVol_sorted{p,1}       = caldt_close_signedVol_sorted{p,1}(IA,:);
    closing_signedVol_sorted{p,1}           = closing_signedVol_sorted{p,1}(IA,:);
    closing_SV_sorted{p,1}                  = close_signed_volume(IB,:);
    caldt_daily_CTC_sorted{p,1}             = caldt_true(IB);
    
    % contemporaneous
    log_return_int_sorted{p,1}              = log_ret_int(IB,:);   
end


abc     =   nanmean(closing_SV_sorted{1,1})
abc2    =   nanmean(closing_SV_sorted{2,1})
abc3    =   nanmean(closing_SV_sorted{3,1})



for p = 1 : NN

    [C,IA,IB]                                   = intersect(caldt_close_signedVol_sorted_lead{p,1},caldt_true_lead);
    caldt_close_signedVol_sorted_lead{p,1}      = caldt_close_signedVol_sorted_lead{p,1}(IA,:);
    closing_signedVol_sorted_lead{p,1}          = closing_signedVol_sorted_lead{p,1}(IA,:);
    caldt_daily_CTC_sorted_lead{p,1}            = caldt_true_lead(IB);

    % next day
    log_return_int_sorted{p,2}                  = log_ret_int_lead(IB,:); 
    signed_volume_sorted{p,1}                   = signed_volume_lead(IB,:);
    cum_signed_volume_sorted{p,1}               = cum_signed_volume_lead(IB,:);
end


% ----------------------------------------------------------------% 
% cumulate for the drift
% ----------------------------------------------------------------%


for p = 1 : NN
    cum_logrets_sorted{p,1}                 = [0 , 100*252*nanmean(cumsum(log_return_int_sorted{p,1},2))];
    cum_logrets_sorted{p,2}                 = [0 , 100*252*nanmean(cumsum(log_return_int_sorted{p,2},2))];
    mu_cum_signed_volume_sorted{p,1}        = [0 , nanmean(cum_signed_volume_sorted{p,1},1)];
end

% ---------------------------------------------------------------------------------- %

FS              = 18;
k               = 2; % next day
y_min           = 0;
y_max           = 0;

for p = 1 : NN
    temp_min            = floor(min(cum_logrets_sorted{p,k})/1)*1
    y_min               = min(temp_min,y_min);

    temp_max            = ceil(max(cum_logrets_sorted{p,k})/1)*1
    y_max               = max(temp_max,y_max);
end

YY                      = [y_min y_max  y_max y_min  ];

figure
for p = 1 : NN
    hold on
    plot(volume_indx,cum_logrets_sorted{p,k}(1:length(volume_indx)),'LineWidth',4);
end
hold on
plot(volume_indx,zeros(1,length(volume_indx)),'LineWidth',2,'color','black')
legend('SV<-10000','SV \in [-10000,0)', 'SV \in [0,10000]' , 'SV > 10000','Location','Northwest')
ylabel('Ann. avg. 5-min cum log return (in %, line)')
xlabel('Volume Time (#contracts)' )
set(gca, 'XTickLabel', get(gca, 'XTick'));

figure
for p = 1 : NN
    hold on
    plot(volume_indx,mu_cum_signed_volume_sorted{p,1}(1:length(volume_indx)),'LineWidth',4);
end
hold on
plot(volume_indx,zeros(1,length(volume_indx)),'LineWidth',2,'color','black')
legend('SV<-10000','SV \in [-10000,0)', 'SV \in [0,10000]' , 'SV > 10000','Location','Northwest')
ylabel('Signed Volume (# contracts)')
xlabel('Volume Time (#contracts)' )
set(gca, 'XTickLabel', get(gca, 'XTick'));

